creating ranks

the ranks will be a named num, with entrezgene_id as name and stat (Wald Test) as metric

getRanks <- function(res, annot) {
  # only taking genes which have entrezgene_ids assigned to them
  genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>% 
    filter(!is.na(entrezgene_id))
  
  ranks <- as.data.frame(res) %>%
    tibble::rownames_to_column("GeneID") %>%
    merge(genes_with_entrez, by = "GeneID") %>%
    arrange(desc(stat)) %>% 
    select(entrezgene_id, stat) %>% 
    tibble::deframe() # creating a named num from two columns
  return(ranks)
}

ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)
# TODO: why (again) is the soleus gene count seemingly 300 below soleus gene count / ranks count
# -> seems because of e.g. the cutoff at DESeq

ranks distribution

duplicate entrezgene_ids

duplicate entrezgene ids (multiple entrez are mapped to the same gene name)

Thus a quick look if any of these genes can be simply omitted. Like if the gene_type is “other” or “tRNA”

Since all ENSEMBL duplicates are also found in the gene_name duplicates, using the ENSEMBL as id for the ranks would reduce the number of duplicates by 107.

actually most of them are “protein coding” and will not be omitted. to be continued …

loading pathways

Pathways are provided by http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp

For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:

database gene sets
BioCarta 252
Reactome 1249
WikiPathways 186

applying fgsea

fgseaRes <- fgsea(
  pathways = CGP,
  stats    = ranks,
  minSize  = 15,
  maxSize  = 200
)

most differential regulated pathways, both tissues

using NES from the fgsea result filtering on the set `pCutoff=`0.01 yields the following plot:

pCutoff <- params$pCutoff
fgseaRes.combined <- merge(
  data.frame(fgseaRes.gastroc[, c("pathway", "NES", "padj")]),
  data.frame(fgseaRes.soleus[, c("pathway", "NES", "padj")]),
  by = "pathway",
  suffixes = c(".ga", ".sol")
) %>%
  filter(padj.ga < pCutoff | padj.sol < pCutoff) %>%
  mutate(
    diff.exp = case_when(
      NES.ga  < 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both down",
      NES.ga  > 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both up",
      NES.ga  < 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "gastrocnemius down, soleus up",
      NES.ga  > 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "gastrocnemius up, soleus down",
                                  padj.ga < pCutoff & padj.sol > pCutoff ~ "only gastrocnemius",
      # NES.ga  > 0 &               padj.ga < pCutoff & padj.sol > pCutoff ~ "ga up",
                                  padj.ga > pCutoff & padj.sol < pCutoff ~ "only soleus",
      # NES.sol > 0 &               padj.ga > pCutoff & padj.sol < pCutoff ~ "sol up",
      TRUE ~ "different"
    )
  )

# final plot
p <- ggplot(fgseaRes.combined, aes(x = NES.ga, y = NES.sol, text=pathway)) +
  geom_vline(xintercept = 0) + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(color = diff.exp)) +
  # scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
  labs(x = "gastrocnemius", y = "soleus", color = "significantly\ndifferentially\nexpressed") +
  # ggrepel::geom_label_repel(max.overlaps = 20) + 
  ggtitle(label = "Normalized Enrichment Score") +
  theme_bw()

ggsave(filename = file.path("plots", "04_gsea_scatter.svg"), p)
## Saving 7 x 5 in image
p

# interactive:
# plotly::ggplotly(p, tooltip = "all")

barplot

p <- ggplot(fgseaRes.combined, aes(x = diff.exp)) +
  geom_bar(aes(fill = diff.exp)) +
  theme_bw()

ggsave(filename = file.path("plots", "04_gsea_scatter_barplot.svg"), p)
## Saving 7 x 5 in image
p

Venn/Euler-Diagram

venn.colors <- c(params$tissue_pal, scales::hue_pal()(4))

sign_geneSets_gastroc <-
  data.frame(fgseaRes.gastroc[, c("pathway", "padj")]) %>%
  filter(padj < params$pCutoff) %>% 
  pull(pathway)


sign_geneSets_soleus <-
  data.frame(fgseaRes.soleus[, c("pathway", "padj")]) %>%
  filter(padj < params$pCutoff) %>% 
  pull(pathway)

# only the two tissues
gene_sets <- list(
  "gastroc" = sign_geneSets_gastroc,
  "soleus" = sign_geneSets_soleus#,
  # "gastroc&soleus" = sign_gene_stats$shared_sig_genes
)

# 1st option: euler two tissues
p <- plot(
  euler(gene_sets),
  quantities = T,
  legend = list(side = "right"),
  fills = venn.colors,
  main = "significant gene sets (GSEA)"
)
ggsave(filename = file.path("plots", "04_euler.svg"), p)
## Saving 7 x 5 in image
p

# 2nd option: venn two tissues
library(eulerr)
p <- plot(
  eulerr::venn(gene_sets),
  fills = params$tissue_pal,
  main = "significant gene sets (GSEA)"
)
ggsave(filename = file.path("plots", "04_venn.svg"), p)
## Saving 7 x 5 in image
p

dotplot

Note: Different to most other plots, the dotplots seem to be created best by running the chunks manually. This results in automatic axis scaling with more entries, so that the labels do not overlap (which is curiously not the case if remotely knitting)!

dotplot: “both upregulated”

only 7 gene sets => fits well in the plot

group = "both up"
p_combined <- combined_dotplot(group, fgseaRes.combined)

ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined

dotplot: “both down”

gets adjusted by the function well. The .svg file looks okay

group = "both down"
p_combined <- combined_dotplot(group, fgseaRes.combined)

 ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined

dotplot: “only gastrocnemius”

gets adjusted by the function well. The .svg file looks okay

group = "only gastrocnemius"
p_combined <- combined_dotplot(group, fgseaRes.combined, max_label_length = 100)

ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined

group = "only gastrocnemius"
pathways <- filter(fgseaRes.combined, diff.exp == group)$pathway

gastroc.df <-
  filter(fgseaRes.gastroc, pathway %in% pathways) %>%
  arrange(NES)

p.gastroc <- fgsea_dotplot(gastroc.df, max_label_length = 100)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, "_single.svg")), p.gastroc)
## Saving 7 x 5 in image
p.gastroc

dotplot: “only soleus”

gets adjusted by the function well. The .svg file looks okay

group = "only soleus"
p_combined <- combined_dotplot(group, fgseaRes.combined, max_label_length = 100, sort_by_gastroc = F)

ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined

group = "only soleus"
pathways <- filter(fgseaRes.combined, diff.exp == group)$pathway

soleus.df <-
  filter(fgseaRes.soleus, pathway %in% pathways) %>%
  arrange(NES)

p.soleus <-
  fgsea_dotplot(soleus.df, max_label_length = 100)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, "_single.svg")), p.soleus)
## Saving 7 x 5 in image
p.soleus

dotplot: top20

Enrichment score plot

ordering pathways by padj values and using ES to

# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=params$pCutoff, n=10) {
  .updown <- ifelse(up, `>`, `<`)
  
  top.pathways <- fgseaRes %>%
    filter(.updown(ES,0), padj < pCutoff) %>%
    arrange(padj) %>% 
    slice_head(n=n)
  
 return(top.pathways) 
}

plot for top up and down regulated pathways

# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
  # extracting the top n pathways
  top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
  
  plot.list <- list()
  # lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
  
  for (i in 1:nrow(top.pathways)) {
    # filling plot.list with enrichmentPlots 
    # TODO: how can I use facet_wrap for this?
    pathway <- top.pathways[i]$pathway
    plt <- plotEnrichment(pathways[[pathway]], ranks) +
      # TODO: adjust yaxis to the same scale
      # TODO: keep axis.text.x only on the lower row
      # TODO: keep axis.text.y only on the right column
      theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank()
      ) # +
      # coord_cartesian(xlim = lims$x, ylim = lims$y)
    plot.list[[i]] <- plt
  }
    
  arrange_plts(plot.list)
}

# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list) {
  nplts <- length(plt.list)
  plt <- plt.list[1]
  xlab <- plt$labels$x
  ylab <- plt$labels$y
  
  # set axis to the same scale
  lims <- list("x" = c(0, 17000), "y" = c(-0.8, 0.0))
  
  # remove axis
  
  # arrange the plots
  fig_labels <- LETTERS[1:nplts]
  
  patchwork::wrap_plots(plt.list, )
  
  figure <- ggpubr::ggarrange(plotlist = plt.list,
                              labels = fig_labels) %>%
    annotate_figure(left = text_grob(ylab, rot = 90),
                    bottom = text_grob(xlab))
 
  # TODO: remove all x-axis labels except lower row
  # get dimensions
  figure$layers
   
  return(figure)
}



# plot_labels <-
#     data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)

gastroc up

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)

# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
B WP_MICROGLIA_PATHOGEN_PHAGOCYTOSIS_PATHWAY
C WP_APOPTOSIS
D REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL
E WP_FIBRIN_COMPLEMENT_RECEPTOR_3_SIGNALING_PATHWAY
F WP_CHEMOKINE_SIGNALING_PATHWAY
G BIOCARTA_TNFR2_PATHWAY
H REACTOME_DAP12_INTERACTIONS
I REACTOME_FCGAMMA_RECEPTOR_FCGR_DEPENDENT_PHAGOCYTOSIS

gastroc down

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
B REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS
C REACTOME_RESPIRATORY_ELECTRON_TRANSPORT
D WP_ELECTRON_TRANSPORT_CHAIN
E REACTOME_COMPLEX_I_BIOGENESIS
F REACTOME_MITOCHONDRIAL_TRANSLATION
G REACTOME_KEAP1_NFE2L2_PATHWAY
H REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA
I REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS

soleus up

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
B REACTOME_FORMATION_OF_A_POOL_OF_FREE_40S_SUBUNITS
C REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC
D WP_CYTOPLASMIC_RIBOSOMAL_PROTEINS
E WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
F REACTOME_EUKARYOTIC_TRANSLATION_INITIATION
G REACTOME_NONSENSE_MEDIATED_DECAY_NMD
H REACTOME_MAJOR_PATHWAY_OF_RRNA_PROCESSING_IN_THE_NUCLEOLUS_AND_CYTOSOL
I REACTOME_PRC2_METHYLATES_HISTONES_AND_DNA

soleus down

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_KEAP1_NFE2L2_PATHWAY
B REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS
C REACTOME_GLI3_IS_PROCESSED_TO_GLI3R_BY_THE_PROTEASOME
D REACTOME_UBIQUITIN_MEDIATED_DEGRADATION_OF_PHOSPHORYLATED_CDC25A
E REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS
F REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA
G REACTOME_DEGRADATION_OF_DVL
H REACTOME_ABC_FAMILY_PROTEINS_MEDIATED_TRANSPORT
I REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS

GSEA table plot

gastroc

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.gastroc,
  fgseaRes = fgseaRes.gastroc,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

soleus

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.soleus,
  fgseaRes = fgseaRes.soleus,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

list of all significant pathways

pathways will be sorted here by the absolute sum of the NES…

both down

fgseaRes.combined %>% 
  filter(diff.exp == "both down") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))

both up

fgseaRes.combined %>% 
  filter(diff.exp == "both up") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))

only gastroc

fgseaRes.combined %>% 
  filter(diff.exp == "only gastrocnemius") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))

only soleus

fgseaRes.combined %>% 
  filter(diff.exp == "only soleus") %>% 
  mutate(NES = abs(NES.ga) + abs(NES.sol)) %>% 
  select(pathway, NES) %>% 
  arrange(desc(NES)) %>%
  rmarkdown::paged_table(options = list(cols.min.print = 2))

currentTODOs